##Useful info for all rows
“round_won_by”: team that won the round “match_id”: “0 astralis-vs-cr4zy-m1-inferno” unique match identifier “map_name”: name of map played “round_num”: number of the round from 1 to the last round “round_tot”: total number of rounds in the match “match_event”: unique event identifier ex “StarLadder Major Berlin 2019” “match_date”: date of match in milliseconds “match_team1”: name of a team “match_team2”: name of the other team
library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
-- Attaching packages -------------------------------------------------------------------------------------------------------------------------------- tidyverse 1.3.1 --
v ggplot2 3.3.5 v purrr 0.3.4
v tibble 3.1.4 v dplyr 1.0.7
v tidyr 1.1.3 v stringr 1.4.0
v readr 2.0.1 v forcats 0.5.1
-- Conflicts ----------------------------------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
x dplyr::filter() masks stats::filter()
x dplyr::lag() masks stats::lag()
kills
wins <- kills %>% filter(attackerTeam!=victimTeam) %>% group_by(match_id,round_num) %>% summarise(map=last(map_name),T_team=last(if_else(attackerSide=="T","T","CT")))
`summarise()` has grouped output by 'match_id'. You can override using the `.groups` argument.
most_played <- kills %>% group_by(match_id ) %>% summarize( attackerSteamID=last(attackerSteamID),attackerName=last(attackerName) ) %>% group_by(attackerSteamID) %>% summarize(name=last(attackerName),match_count=n())
most_played[order(most_played$match_count, decreasing=TRUE), ] %>% top_n(30)
Selecting by match_count
wins <- kills %>% filter(attackerTeam!=victimTeam) %>% group_by(match_id,round_num) %>% summarise(map=last(map_name),winner=last(round_won_by),T_team=last(if_else(attackerSide=="T",attackerTeam,victimTeam)))
`summarise()` has grouped output by 'match_id'. You can override using the `.groups` argument.
wins
wins <- kills %>% group_by(match_id) %>% summarise(round_tot=last(round_tot)+1,team1=last(match_team1),team2=last(match_team2),winner=last(round_won_by),looser=if_else(last(round_won_by)!=last(match_team1),last(match_team1),last(team2)))
wins <- wins %>% mutate(final_score_winner=if_else(round_tot<31,16, 19+3*( (round_tot-31)%/%6))) %>% mutate(final_score_looser=round_tot-final_score_winner) %>% mutate(score_diff=(final_score_winner-final_score_looser)/round_tot)
wins
kills
players <- kills %>% group_by(match_event,attackerSteamID) %>% summarise(name=last(attackerName),team=last(attackerTeam),date=last(match_date),event=last(match_event) )
`summarise()` has grouped output by 'match_event'. You can override using the `.groups` argument.
players
Team stats
kills
#team stats on kills
team_kills <- kills %>% filter(attackerTeam!=victimTeam) %>% group_by(match_id,attackerTeam) %>% summarise(kills_tot=n(),kpr=n()/last(round_tot),team=last(attackerTeam),hs_percent= sum(isHeadshot==TRUE)/n(),blinded_percent=sum(attackerBlinded==TRUE)/n(),blinded_percent=sum(victimBlinded==TRUE)/n(),first_kill_percent=sum(isFirstKill==TRUE)/last(round_tot),first_kill_percent=sum(isFirstKill==TRUE)/last(round_tot),non_traded_percent=sum(isTrade==FALSE)/n(),assist_percent= sum(!is.na(assisterTeam))/last(round_tot))
`summarise()` has grouped output by 'match_id'. You can override using the `.groups` argument.
team_kills
#team stats on damages
team_damages <- damages %>% filter(attackerTeam!=victimTeam) %>% group_by(match_id,attackerTeam) %>% summarise(adr=sum(hpDamageTaken)/last(round_tot),team=last(attackerTeam),strafing=sum(attackerStrafe==TRUE)/n())
`summarise()` has grouped output by 'match_id'. You can override using the `.groups` argument.
#team stat on flash
flashes
team_flashes <- flashes %>% filter(attackerTeam!=playerTeam) %>% group_by(match_id,attackerTeam) %>% summarise(avg_flash_duration=sum(flashDuration)/last(round_tot),team=last(attackerTeam))
`summarise()` has grouped output by 'match_id'. You can override using the `.groups` argument.
#team stats for grenades
team_grenades <- grenades %>% rename(attackerTeam=throwerTeam) %>% group_by(match_id,attackerTeam) %>% summarise(grenades_per_round=n()/last(round_tot),team=last(attackerTeam),smokes_per_round=sum("Smoke Grenade"==grenadeType)/last(round_tot),attack_grenades_round=sum("Smoke Grenade"!=grenadeType)/last(round_tot),event=last(match_event))
`summarise()` has grouped output by 'match_id'. You can override using the `.groups` argument.
#join everything
team_killsstat <- inner_join(team_kills,wins) %>% inner_join(team_damages) %>% inner_join(team_flashes) %>% inner_join(team_grenades)
Joining, by = "match_id"
Joining, by = c("match_id", "attackerTeam", "team")
Joining, by = c("match_id", "attackerTeam", "team")
Joining, by = c("match_id", "attackerTeam", "team")
#create y axis score difference data
team_killsstat <- team_killsstat %>% mutate(relative_score_diff=if_else(winner!=attackerTeam,-score_diff,score_diff))
PCA
pca.variables <- c('kills_tot', 'kpr', 'hs_percent', 'blinded_percent', 'first_kill_percent',
'non_traded_percent', 'assist_percent', 'strafing', 'adr', 'avg_flash_duration', 'grenades_per_round')
team_killsstat.pca <- prcomp(team_killsstat[,pca.variables], center = TRUE,scale. = TRUE)
summary(team_killsstat.pca)
Importance of components:
PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9 PC10 PC11
Standard deviation 2.0433 1.2503 1.00724 0.99353 0.91287 0.85210 0.7438 0.7236 0.56743 0.5307 0.14189
Proportion of Variance 0.3795 0.1421 0.09223 0.08974 0.07576 0.06601 0.0503 0.0476 0.02927 0.0256 0.00183
Cumulative Proportion 0.3795 0.5217 0.61390 0.70364 0.77939 0.84540 0.8957 0.9433 0.97257 0.9982 1.00000
library(devtools)
Warning: package ‘devtools’ was built under R version 4.1.2
Loading required package: usethis
Warning: package ‘usethis’ was built under R version 4.1.2
install_github("vqv/ggbiplot")
WARNING: Rtools is required to build R packages, but is not currently installed.
Please download and install Rtools 4.0 from https://cran.r-project.org/bin/windows/Rtools/.
Downloading GitHub repo vqv/ggbiplot@HEAD
These packages have more recent versions available.
It is recommended to update all of them.
Which would you like to update?
1: All
2: CRAN packages only
3: None
4: rlang (0.4.11 -> 0.4.12) [CRAN]
5: glue (1.4.2 -> 1.5.1 ) [CRAN]
6: crayon (1.4.1 -> 1.4.2 ) [CRAN]
7: pillar (1.6.2 -> 1.6.4 ) [CRAN]
8: lifecycle (1.0.0 -> 1.0.1 ) [CRAN]
9: withr (2.4.2 -> 2.4.3 ) [CRAN]
10: tibble (3.1.4 -> 3.1.6 ) [CRAN]
11: digest (0.6.27 -> 0.6.29) [CRAN]
WARNING: Rtools is required to build R packages, but is not currently installed.
Please download and install Rtools 4.0 from https://cran.r-project.org/bin/windows/Rtools/.
√ checking for file 'C:\Users\nguye\AppData\Local\Temp\Rtmp0Ef7Kg\remotes4274ef94be9\vqv-ggbiplot-7325e88/DESCRIPTION' (406ms)
- preparing 'ggbiplot':
checking DESCRIPTION meta-information ...
√ checking DESCRIPTION meta-information
- checking for LF line-endings in source and make files and shell scripts
- checking for empty or unneeded directories
- looking to see if a 'data/datalist' file should be added
- building 'ggbiplot_0.55.tar.gz'
Installing package into ‘C:/Users/nguye/OneDrive/Documents/R/win-library/4.1’
(as ‘lib’ is unspecified)
* installing *source* package 'ggbiplot' ...
** using staged installation
** R
** data
** byte-compile and prepare package for lazy loading
** help
*** installing help indices
converting help for package 'ggbiplot'
finding HTML links ... done
ggbiplot html
ggscreeplot html
wine html
** building package indices
** testing if installed package can be loaded from temporary location
*** arch - i386
*** arch - x64
** testing if installed package can be loaded from final location
*** arch - i386
*** arch - x64
** testing if installed package keeps a record of temporary installation path
* DONE (ggbiplot)
We now color points so that teams that have won appear in a certain color, and teams that didn’t appear in another color. We also make arrow stand out a bit more, and add the correlation circle.
p <- ggbiplot(team_killsstat.pca , obs.scale = 1, var.scale =1,circle=TRUE, groups=team_killsstat$win,varname.adjust = 1.7, varname.size = 2.3, alpha = 0.7)
Warning: Unknown or uninitialised column: `win`.
p <- p + xlim(-4.5, 3) +
ylim(-3.1, 5) +
ggtitle("PCA of team statistics in matches") +
theme(plot.title = element_text(hjust = 0.5))+
labs(colour = "Match result")
p
Warning: Removed 97 rows containing missing values (geom_point).